# -*- coding:utf-8 -*-
# @Author: shenyuyu
# @Time: 2023/6/25 15:29
# @File: qu_1.py

from pyspark import SparkConf, SparkContext
import json

if __name__ == '__main__':
    conf = SparkConf().setAppName("order")
    sc = SparkContext(conf=conf)

    rdd1 = sc.textFile("hdfs://hadoop1:9820/order.text")
    print(rdd1.collect())

    rdd2 = rdd1.flatMap(lambda x: x.split("|"))
    print(rdd2.collect())

    rdd3 = rdd2.map(lambda x: json.loads(x))
    print(rdd3.collect())

    rdd4 = rdd3.filter(lambda x: x["areaName"] == "北京")
    print(rdd4.collect())

    rdd5 = rdd4.map(lambda x: x["category"]).distinct()
    print(rdd5.collect())